<!DOCTYPE html>



  


<html class="theme-next gemini use-motion" lang>
<head><meta name="generator" content="Hexo 3.8.0">
  <meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<meta name="theme-color" content="#222">









<meta http-equiv="Cache-Control" content="no-transform">
<meta http-equiv="Cache-Control" content="no-siteapp">
















  
  
  <link href="/hcigmoid/lib/fancybox/source/jquery.fancybox.css?v=2.1.5" rel="stylesheet" type="text/css">




  
  
  
  

  
    
    
  

  
    
      
    

    
  

  

  

  

  
    
    
    <link href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic|Lato:300,300italic,400,400italic,700,700italic&subset=latin,latin-ext" rel="stylesheet" type="text/css">
  






<link href="/hcigmoid/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css">

<link href="/hcigmoid/css/main.css?v=5.1.4" rel="stylesheet" type="text/css">


  <link rel="apple-touch-icon" sizes="180x180" href="/hcigmoid/images/apple-touch-icon-next.png?v=5.1.4">


  <link rel="icon" type="image/png" sizes="32x32" href="/hcigmoid/images/favicon-32x32-next.png?v=5.1.4">


  <link rel="icon" type="image/png" sizes="16x16" href="/hcigmoid/images/favicon-16x16-next.png?v=5.1.4">


  <link rel="mask-icon" href="/hcigmoid/images/logo.svg?v=5.1.4" color="#222">





  <meta name="keywords" content="推荐,协同过滤,算法,召回,矩阵分解,">





  <link rel="alternate" href="/hcigmoid/atom.xml" title="HCigmoid" type="application/atom+xml">






<meta name="description" content="论文引用：Koren, Y. . (2008). Factorization meets the neighborhood : a multifaceted collaborative filtering model. Proceedings of the 14th ACM SIGKDD International Conference of Knowledge Discovery and Da">
<meta name="keywords" content="推荐,协同过滤,算法,召回,矩阵分解">
<meta property="og:type" content="article">
<meta property="og:title" content="SVD++ 论文精读">
<meta property="og:url" content="http://guyuecanhui.gitee.io/hcigmoid/2018/02/24/svdpp 2/index.html">
<meta property="og:site_name" content="HCigmoid">
<meta property="og:description" content="论文引用：Koren, Y. . (2008). Factorization meets the neighborhood : a multifaceted collaborative filtering model. Proceedings of the 14th ACM SIGKDD International Conference of Knowledge Discovery and Da">
<meta property="og:locale" content="default">
<meta property="og:updated_time" content="2020-04-30T16:47:10.000Z">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="SVD++ 论文精读">
<meta name="twitter:description" content="论文引用：Koren, Y. . (2008). Factorization meets the neighborhood : a multifaceted collaborative filtering model. Proceedings of the 14th ACM SIGKDD International Conference of Knowledge Discovery and Da">



<script type="text/javascript" id="hexo.configurations">
  var NexT = window.NexT || {};
  var CONFIG = {
    root: '/hcigmoid/',
    scheme: 'Gemini',
    version: '5.1.4',
    sidebar: {"position":"left","display":"post","offset":12,"b2t":false,"scrollpercent":false,"onmobile":false},
    fancybox: true,
    tabs: true,
    motion: {"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}},
    duoshuo: {
      userId: '0',
      author: '博主'
    },
    algolia: {
      applicationID: '',
      apiKey: '',
      indexName: '',
      hits: {"per_page":10},
      labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
    }
  };
</script>



  <link rel="canonical" href="http://guyuecanhui.gitee.io/hcigmoid/2018/02/24/svdpp 2/">





  <title>SVD++ 论文精读 | HCigmoid</title>
  








</head>

<body itemscope itemtype="http://schema.org/WebPage" lang="default">

  
  
    
  

  <div class="container sidebar-position-left page-post-detail">
    <div class="headband"></div>

    <header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-wrapper">
  <div class="site-meta ">
    

    <div class="custom-logo-site-title">
      <a href="/hcigmoid/" class="brand" rel="start">
        <span class="logo-line-before"><i></i></span>
        <span class="site-title">HCigmoid</span>
        <span class="logo-line-after"><i></i></span>
      </a>
    </div>
      
        <p class="site-subtitle">Watch, learn and practise</p>
      
  </div>

  <div class="site-nav-toggle">
    <button>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
    </button>
  </div>
</div>

<nav class="site-nav">
  

  
    <ul id="menu" class="menu">
      
        
        <li class="menu-item menu-item-home">
          <a href="/hcigmoid/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-home"></i> <br>
            
            首页
          </a>
        </li>
      
        
        <li class="menu-item menu-item-about">
          <a href="/hcigmoid/about/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-user"></i> <br>
            
            关于
          </a>
        </li>
      
        
        <li class="menu-item menu-item-tags">
          <a href="/hcigmoid/tags/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-tags"></i> <br>
            
            标签
          </a>
        </li>
      
        
        <li class="menu-item menu-item-categories">
          <a href="/hcigmoid/categories/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-th"></i> <br>
            
            分类
          </a>
        </li>
      
        
        <li class="menu-item menu-item-archives">
          <a href="/hcigmoid/archives/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-archive"></i> <br>
            
            归档
          </a>
        </li>
      

      
        <li class="menu-item menu-item-search">
          
            <a href="javascript:;" class="popup-trigger">
          
            
              <i class="menu-item-icon fa fa-search fa-fw"></i> <br>
            
            搜索
          </a>
        </li>
      
    </ul>
  

  
    <div class="site-search">
      
  <div class="popup search-popup local-search-popup">
  <div class="local-search-header clearfix">
    <span class="search-icon">
      <i class="fa fa-search"></i>
    </span>
    <span class="popup-btn-close">
      <i class="fa fa-times-circle"></i>
    </span>
    <div class="local-search-input-wrapper">
      <input autocomplete="off" placeholder="搜索..." spellcheck="false" type="text" id="local-search-input">
    </div>
  </div>
  <div id="local-search-result"></div>
</div>



    </div>
  
</nav>



 </div>
    </header>

    <main id="main" class="main">
      <div class="main-inner">
        <div class="content-wrap">
          <div id="content" class="content">
            

  <div id="posts" class="posts-expand">
    

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://guyuecanhui.gitee.io/hcigmoid/hcigmoid/2018/02/24/svdpp 2/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="古月残辉">
      <meta itemprop="description" content>
      <meta itemprop="image" content="/hcigmoid/images/avatar.gif">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="HCigmoid">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">SVD++ 论文精读</h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2018-02-24T20:16:34+08:00">
                2018-02-24
              </time>
            

            

            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/hcigmoid/categories/论文精读/" itemprop="url" rel="index">
                    <span itemprop="name">论文精读</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/hcigmoid/2018/02/24/svdpp 2/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count valine-comment-count" data-xid="/hcigmoid/2018/02/24/svdpp 2/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          
            <span class="post-meta-divider">|</span>
            <span class="page-pv"><i class="fa fa-file-o"></i> 阅读数
            <span class="busuanzi-value" id="busuanzi_value_page_pv"></span>
            </span>
          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  2.9k
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  12
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        <blockquote>
<p>论文引用：Koren, Y. . (2008). Factorization meets the neighborhood : a multifaceted collaborative filtering model. Proceedings of the 14th ACM SIGKDD International Conference of Knowledge Discovery and Data Mining, 2008. ACM Press.</p>
</blockquote>
<p>本文对协同过滤中最主要的两种方法（基于邻域的方法和基于隐特征模型的方法）分别提出了优化方案，并且设计了一个联合模型将两种方法统一，从而达到更好的效果。为了进行区分，本文将对 SVD 进行优化的方案称为 SVD+，将联合模型的方法称为 SVD++。</p>
<h1 id="研究背景"><a href="#研究背景" class="headerlink" title="研究背景"></a>研究背景</h1><p>Koren 在做 Netflix 的比赛过程中，发现基于邻域的方法和基于隐特征模型的方法各有所长：</p>
<div class="table-container">
<table>
<thead>
<tr>
<th>比较</th>
<th>基于邻域的方法</th>
<th>基于隐特征模型的方法</th>
</tr>
</thead>
<tbody>
<tr>
<td>主要思想</td>
<td>核心在于计算用户/物品的相似度，将相似用户的喜好推荐给用户，或将用户喜欢物品的相似仿物品推荐给用户</td>
<td>假设真正描述用户评分矩阵性质的内存特征（可能未知）其实只有少数几个，将用户和物品都映射到这些隐特征层，从而使得用户和物品直接关联起来</td>
</tr>
<tr>
<td>挖掘信息特征</td>
<td>能够对局部强相关的关系更敏感，而无法捕捉全局弱相关的关系</td>
<td>能够估计关联所有物品/用户的整体结构，但是难以反映局部强相关的关系</td>
</tr>
</tbody>
</table>
</div>
<p>因此，这两种方法存在天然的互补关系。另外，Koren 还发现，使用隐式反馈的数据能够提高推荐的准确性，而这两种方法都不支持使用隐式反馈的数据。基于这些发现，Koren 先分别将隐式反馈集成到两个模型中去，得到两个优化的模型，再提出一种联合模型，将这两个优化的模型进一步融合，从而得到更好的效果。</p>
<h1 id="模型推导"><a href="#模型推导" class="headerlink" title="模型推导"></a>模型推导</h1><p>文章从 Baseline 的模型，通过加入各种考虑因素，推导出基于邻域和基于隐特征的两个模型，再推导出联合模型。</p>
<h2 id="1、Baseline模型"><a href="#1、Baseline模型" class="headerlink" title="1、Baseline模型"></a>1、Baseline模型</h2><p>Baseline 模型就是基于历史数据的简单统计，主要看用户 $u$ 的平均评分 $b_u$、电影 $i$ 的平均评分 $b_i$ 和所有电影的平均评分 $\mu$：</p>
<script type="math/tex; mode=display">
b_{ui} = \mu + b_u + b_i</script><p>所有后面的模型都是对这个基准模型的修正。这个基准模型中的参数都是可以离线计算的，用的方法也是本文通用的参数估计方法，先定义损失函数 $l(P)$：</p>
<script type="math/tex; mode=display">
l(p_1,p_2,\cdots) = \sum_{(u,i)\in \kappa} (r_{ui} - \hat{r_{ui}})^2 + \lambda(\sum_{p_1} p_1^2 + \sum_{p_2} p_2^2 + \cdots)</script><p>其中，<script type="math/tex">P=\{p_1,p_2,\cdots\}</script> 表示待估计的参数，$\kappa$ 表示所有显式反馈的组合（即用户 $u$ 对物品 $i$ 进行过评分），<script type="math/tex">r_{ui}</script> 表示评分的实际值，<script type="math/tex">\hat{r_{ui}}</script> 表示评分的预测值，$\lambda$ 为超参，根据经验设置，然后求最小化 $l(P)$ 下各参数的值，通常使用最小二乘法，或者文中使用的梯度下降法（效率更高）。比如这个地方，参数就 $b_u$ 和 $b_i$，可以根据下式进行参数估计：</p>
<script type="math/tex; mode=display">
\min_{b_*}\sum_{(u,i)\in \kappa} (r_{ui} - \hat{r_{ui}})^2 + \lambda(\sum_{p_1} p_1^2 + \sum_{p_2} p_2^2 + \cdots)</script><h2 id="2、推广到基于邻域的模型"><a href="#2、推广到基于邻域的模型" class="headerlink" title="2、推广到基于邻域的模型"></a>2、推广到基于邻域的模型</h2><p>本文主要考虑 ItemCF，对于两个物品 $i$ 和 $j$，它们的相似性 <script type="math/tex">s_{ij}</script> 是基于 Pearson 相关系数 <script type="math/tex">\rho_{ij}</script> 计算得到：</p>
<script type="math/tex; mode=display">
s_{ij} = \frac{n_{ij}}{n_{ij}+\lambda_2}\rho_{ij}, \\ \rho_{ij}=\frac{E((x-\mu_x)(y-\mu_y))}{\sigma_x\sigma_y}</script><p>其中，$n_{ij}$ 表示同时对 $i$ 和 $j$ 进行评分的用户数，$\lambda_2$ 应该是防止 $i$ 和 $j$ 比较冷门的情况下，恰好有个别用户同时对它们进行了评分，这时候它们的相关性实际是看不出来的，属于偶然情况，通常 $\lambda_2=100$。之前的 ItemCF 进一步利用用户 $u$ 评过分的与 $i$ 最相关的 $k$ 个物品 $S^k(i;u)$ 来估计用户 $u$ 对 $i$ 的评分：</p>
<script type="math/tex; mode=display">
\hat{r_{ui}} = b_{ui} + \frac{\sum_{j\in S^k(i;u)} s_{ij}(r_{uj} - b_{uj})}{\sum_{j\in S^k(i;u)} s_{ij}}</script><p>但是如果 $u$ 没有对与 $i$ 相似的物品评过分，那上式就主要取决于 <script type="math/tex">b_{ui}</script> 了。为了解决这个小问题，有方案先计算插值权重 $\theta_{ij}^u$ 来取代实际的评分：</p>
<script type="math/tex; mode=display">
\hat{r_{ui}} = b_{ui} + \sum_{j\in S^k(i;u)} \theta_{ij}^u (r_{uj} - b_{uj})</script><p>但是以上模型都只考虑了用户 $u$，而对全局结构没有一个很好的理解，因此 Koren 提出不仅仅使用用户 $u$ 的对 $i$ 最相关的 $k$ 个物品的评分数据，而是使用所有 $u$ 的评分数据，因此引入一个参数 <script type="math/tex">\omega_{ij}</script> 来表示 $j$ 的评分对 $i$ 评分的影响，并且这个 $\omega_{ij}$ 是基于所有用户对 $i$ 和 $j$ 评分估计出来的：</p>
<script type="math/tex; mode=display">
\hat{r_{ui}} = b_{ui} + \sum_{j\in R(u)} (r_{uj} - b_{uj})\omega_{ij}</script><p>分析这个式子，当 $i$ 和 $j$ 越相关，说明 $j$ 对 $i$ 的影响越大，即 <script type="math/tex">w_{ij}</script> 越大，这时候如果 <script type="math/tex">(r_{uj} - b_{uj})</script> 较大，则估计的评分相对于 <script type="math/tex">b_{ui}</script> 的偏移也就越多；反之，当 <script type="math/tex">w_{ij}</script> 较小时，无论 $j$ 的评分如何都对偏移影响不大。</p>
<p>在此基础上，进一步引入隐式反馈的数据：</p>
<script type="math/tex; mode=display">
\hat{r_{ui}} = b_{ui} + \sum_{j\in R(u)} (r_{uj} - b_{uj})\omega_{ij} +\sum_{j\in N(u)} c_{ij}</script><p>其中，<script type="math/tex">c_{ij}</script> 表示隐式反馈对基准估计的偏移影响，当 $j$ 与 $i$ 的评分强相关时，$c_{ij}$ 较大。这个式子的主要问题是，它对重度用户的推荐和对轻度用户的推荐结果相差较大，因为重度用户的显式反馈和隐式反馈都很多，因此偏移项值较大。Koren 发现，做一下正则化以后，效果会更好：</p>
<script type="math/tex; mode=display">
\hat{r_{ui}} = b_{ui} + \mid R(u)\mid ^{-1/2}\sum_{j\in R(u)} (r_{uj} - b_{uj})\omega_{ij} +\mid N(u)\mid ^{-1/2}\sum_{j\in N(u)} c_{ij}</script><p>为了降低上式的计算复杂度，可以只考虑对 $i$ 影响最大的 $k$ 个物品，记 $R^k(i;u)=R(u)\cap S^k(i)$ 表示 $u$ 评分过的物品中属于 $i$ 最相似的 TopK 物品，类似的，记 $N^k(i;u)=N(u)\cap S^k(i)$，这两个集合的元素个数通常是小于 $k$ 的（而如果 $u$ 对至少 $k$ 个物品评过分的话，$\mid S^k(i;u)\mid = k$）。则最终的模型为：</p>
<script type="math/tex; mode=display">
\hat{r_{ui}} = b_{ui} + \mid R^k(i;u)\mid ^{-1/2}\sum_{j\in R(u)} (r_{uj} - b_{uj})\omega_{ij} +\mid N^k(i;u)\mid ^{-1/2}\sum_{j\in N(u)} c_{ij}</script><p>使用之前提到的最小化 <script type="math/tex">f(b_u, b_i, w_{ij}, c_{ij})</script> 的方法来估计这些参数的取值。记 <script type="math/tex">e_{ui}=r_{ui} - \hat{r_{ui}}</script>，则使用梯度下降法得到的迭代公式如下：</p>
<script type="math/tex; mode=display">
\begin{cases}
b_u \leftarrow b_u+\gamma\cdot (e_{ui} - \lambda_4\cdot b_u) \\
b_i \leftarrow b_i+\gamma\cdot (e_{ui} - \lambda_4\cdot b_i) \\
\omega_{ij} \leftarrow \omega_{ij} + \gamma\cdot(\mid R^k(i;u)\mid ^{-1/2}\cdot e_{ui}\cdot (r_{uj} - b_{uj})-\lambda_4\cdot \omega_{ij}), \forall j \in R^k(i;u) \\
c_{ij} \leftarrow c_{ij} + \gamma\cdot(\mid N^k(i;u)\mid ^{-1/2}\cdot e_{ui}-\lambda_4\cdot c_{ij}), \forall j \in N^k(i;u)
\end{cases}</script><p>对于 Netflix 数据集，Koren 推荐取 $\gamma=0.005$，$\lambda_4=0.002$，对所有数据集进行 15 轮训练。从实际效果来看 $k$ 越大，推荐的效果越好。这个模型的计算主要集中在参数训练上，一旦模型训练出来了，就可以快速的进行在线的预测。</p>
<h2 id="3、推广到基于隐特征的模型"><a href="#3、推广到基于隐特征的模型" class="headerlink" title="3、推广到基于隐特征的模型"></a>3、推广到基于隐特征的模型</h2><p>原始的 SVD 是将用户和物品映射到一个隐特征集合：</p>
<script type="math/tex; mode=display">
\hat{r_{ui}} = b_{ui} + p_u^T\cdot q_i</script><p>由于用户的规模通常远大于物品的规模，因此考虑用 $u$ 喜欢的物品来对 $u$ 进行建模，再加上隐式反馈的数据，可以得到 Asymmetric-SVD 模型：</p>
<script type="math/tex; mode=display">
\hat{r_{ui}} = b_{ui} + q_i^T(\mid R(u)\mid ^{-1/2}\sum_{j\in R(u)} (r_{uj} - b_{uj})x_j +\mid N(u)\mid ^{-1/2}\sum_{j\in N(u)} y_j)</script><p>其中，<script type="math/tex">x_j</script> 和 <script type="math/tex">y_j</script> 是用来控制显式反馈和隐式反馈重要性比例的参数。用最小化 <script type="math/tex">f(b_u,b_i,q_i,x_j,y_j)</script> 来估计这些参数值。由于这里用 <script type="math/tex">(r_{uj} - b_{uj})x_j</script> 来替代原来的用户隐特征，因此数据量少了很多。该模型具有比较好的可解释性，并且对于新用户来讲，只要他做了一些反馈，即更新了 <script type="math/tex">r_{uj}</script> 后，就可以立即算出估计值；但是如果新上线一个物品，由于 <script type="math/tex">q_i^T</script> 需要重新估计，因此对新物品的冷启动需要一定的反应时间。</p>
<p>如果对于计算不是很 care 的话，当然可以不用这种简化处理，还是对用户直接进行建模（$p_u$），这样的效果会更好一些，但是可解释性之类的就要差一些：</p>
<script type="math/tex; mode=display">
\hat{r_{ui}} = b_{ui} + q_i^T(p_u +\mid N(u)\mid ^{-1/2}\sum_{j\in N(u)} y_j)</script><h2 id="4、联合模型"><a href="#4、联合模型" class="headerlink" title="4、联合模型"></a>4、联合模型</h2><p>如果把上面两个模型看成是 <code>预测值=基准估计+偏移量</code> 的话，那么这两个模型就可以混合到一起，变成：</p>
<script type="math/tex; mode=display">
\begin{align}
\hat{r_{ui}} &= b_{ui} \\
&+ q_i^T(p_u +\mid N(u)\mid ^{-1/2}\sum_{j\in N(u)} y_j) \\
&+ \mid R^k(i;u)\mid ^{-1/2}\sum_{j\in R(u)} (r_{uj} - b_{uj})\omega_{ij} +\mid N^k(i;u)\mid ^{-1/2}\sum_{j\in N(u)} c_{ij}
\end{align}</script><p>其中，第一项为基准估计，第二项 provides the interaction between the user profile and the item profile. In our example, it may find that “The Sixth Sense” and Joe are rated high on the Psychological Thrillers scale. 第三项 contributes fine grained adjustments that are hard to profile, such as the fact that Joe rated low the related movie “Signs”.</p>
<p>使用梯度下降法得到的迭代公式如下：</p>
<script type="math/tex; mode=display">
\begin{cases}
b_u \leftarrow b_u+\gamma_1\cdot (e_{ui} - \lambda_6\cdot b_u) \\
b_i \leftarrow b_i+\gamma_1\cdot (e_{ui} - \lambda_6\cdot b_i) \\
q_i \leftarrow q_i+ \gamma_2\cdot(e_{ui}\cdot(p_u+\mid N(u)\mid ^{-1/2}\sum_{j\in N(u)} y_j)-\lambda_7\cdot q_i) \\
p_u \leftarrow p_u + \gamma_2\cdot(e_{ui}\cdot q_i - \lambda_7\cdot p_u) \\
y_j \leftarrow y_j+\gamma_2\cdot(e_{ui} \cdot\mid N(u)\mid ^{-1/2} \cdot q_i - \lambda_7\cdot y_j) \\
\omega_{ij} \leftarrow \omega_{ij} + \gamma_3\cdot(\mid R^k(i;u)\mid ^{-1/2}\cdot e_{ui}\cdot (r_{uj} - b_{uj})-\lambda_8\cdot \omega_{ij}),\ \forall j \in R^k(i;u) \\
c_{ij} \leftarrow c_{ij} + \gamma_3\cdot(\mid N^k(i;u)\mid ^{-1/2}\cdot e_{ui}-\lambda_8\cdot c_{ij}),\ \forall j \in N^k(i;u)
\end{cases}</script><p>在 Netflix 的数据集上，建议参数为 <script type="math/tex">\gamma_1=\gamma_2=0.007</script>，<script type="math/tex">\gamma_3=0.001</script>，<script type="math/tex">\lambda_6=0.005</script>，<script type="math/tex">\lambda_7=\lambda_8=0.015</script>，整体迭代约 30 轮收敛，每一轮训练时，可以将 <script type="math/tex">\gamma_*</script> 减少 10%。而 $k=300$，再大也不会有明显的性能提升。</p>
<p>最后，Koren 还设计了一个比较巧妙的实验，解答了我一直以来一个疑问：RMSE 的提升是否也意味着推荐效果的提升。他们设计了一个针对 TopN 推荐的测试，主要的思想是先找出所有 5-star 的评分，认为这些评分意味着该用户喜欢这部电影，然后对所有这些 $(u,i)$，随机再选 1000 部电影，估计 $u$ 对这些电影的评分，看用户对这些电影里所有的 5-star 电影排名情况，然后对不同的算法进行比较，发现 RMSE 越小的算法，将 5-star 排到前面的概率也越大，从而说明了在这种情况下，RMSE 的提升也意味着推荐效果的提升。</p>

      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      
        <div class="post-tags">
          
            <a href="/hcigmoid/tags/推荐/" rel="tag"># 推荐</a>
          
            <a href="/hcigmoid/tags/协同过滤/" rel="tag"># 协同过滤</a>
          
            <a href="/hcigmoid/tags/算法/" rel="tag"># 算法</a>
          
            <a href="/hcigmoid/tags/召回/" rel="tag"># 召回</a>
          
            <a href="/hcigmoid/tags/矩阵分解/" rel="tag"># 矩阵分解</a>
          
        </div>
      

      
      
      

      
        <div class="post-nav">
          <div class="post-nav-next post-nav-item">
            
              <a href="/hcigmoid/2018/02/24/svdpp/" rel="next" title="SVD++ 论文精读">
                <i class="fa fa-chevron-left"></i> SVD++ 论文精读
              </a>
            
          </div>

          <span class="post-nav-divider"></span>

          <div class="post-nav-prev post-nav-item">
            
              <a href="/hcigmoid/2018/07/17/method-vs-function 2/" rel="prev" title="Scala 中函数与方法的差别">
                Scala 中函数与方法的差别 <i class="fa fa-chevron-right"></i>
              </a>
            
          </div>
        </div>
      

      
      
    </footer>
  </div>
  
  
  
  </article>



    <div class="post-spread">
      
    </div>
  </div>


          </div>
          


          

  
    <div class="comments" id="comments">
    </div>
  



        </div>
        
          
  
  <div class="sidebar-toggle">
    <div class="sidebar-toggle-line-wrap">
      <span class="sidebar-toggle-line sidebar-toggle-line-first"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-middle"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-last"></span>
    </div>
  </div>

  <aside id="sidebar" class="sidebar">
    
    <div class="sidebar-inner">

      

      
        <ul class="sidebar-nav motion-element">
          <li class="sidebar-nav-toc sidebar-nav-active" data-target="post-toc-wrap">
            文章目录
          </li>
          <li class="sidebar-nav-overview" data-target="site-overview-wrap">
            站点概览
          </li>
        </ul>
      

      <section class="site-overview-wrap sidebar-panel">
        <div class="site-overview">
          <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
            
              <p class="site-author-name" itemprop="name">古月残辉</p>
              <p class="site-description motion-element" itemprop="description">总结心得</p>
          </div>

          <nav class="site-state motion-element">

            
              <div class="site-state-item site-state-posts">
              
                <a href="/hcigmoid/archives/">
              
                  <span class="site-state-item-count">63</span>
                  <span class="site-state-item-name">日志</span>
                </a>
              </div>
            

            
              
              
              <div class="site-state-item site-state-categories">
                <a href="/hcigmoid/categories/index.html">
                  <span class="site-state-item-count">6</span>
                  <span class="site-state-item-name">分类</span>
                </a>
              </div>
            

            
              
              
              <div class="site-state-item site-state-tags">
                <a href="/hcigmoid/tags/index.html">
                  <span class="site-state-item-count">73</span>
                  <span class="site-state-item-name">标签</span>
                </a>
              </div>
            

          </nav>

          
            <div class="feed-link motion-element">
              <a href="/hcigmoid/atom.xml" rel="alternate">
                <i class="fa fa-rss"></i>
                RSS
              </a>
            </div>
          

          
            <div class="links-of-author motion-element">
                
                  <span class="links-of-author-item">
                    <a href="mailto:guyuecanhui@icloud.com" target="_blank" title="E-Mail">
                      
                        <i class="fa fa-fw fa-envelope"></i>E-Mail</a>
                  </span>
                
            </div>
          

          
          

          
          

          

        </div>
      </section>

      
      <!--noindex-->
        <section class="post-toc-wrap motion-element sidebar-panel sidebar-panel-active">
          <div class="post-toc">

            
              
            

            
              <div class="post-toc-content"><ol class="nav"><li class="nav-item nav-level-1"><a class="nav-link" href="#研究背景"><span class="nav-number">1.</span> <span class="nav-text">研究背景</span></a></li><li class="nav-item nav-level-1"><a class="nav-link" href="#模型推导"><span class="nav-number">2.</span> <span class="nav-text">模型推导</span></a><ol class="nav-child"><li class="nav-item nav-level-2"><a class="nav-link" href="#1、Baseline模型"><span class="nav-number">2.1.</span> <span class="nav-text">1、Baseline模型</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#2、推广到基于邻域的模型"><span class="nav-number">2.2.</span> <span class="nav-text">2、推广到基于邻域的模型</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#3、推广到基于隐特征的模型"><span class="nav-number">2.3.</span> <span class="nav-text">3、推广到基于隐特征的模型</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#4、联合模型"><span class="nav-number">2.4.</span> <span class="nav-text">4、联合模型</span></a></li></ol></li></ol></div>
            

          </div>
        </section>
      <!--/noindex-->
      

      

    </div>
  </aside>


        
      </div>
    </main>

    <footer id="footer" class="footer">
      <div class="footer-inner">
        <div class="copyright">&copy; 2018 &mdash; <span itemprop="copyrightYear">2020</span>
  <span class="with-love">
    <i class="fa fa-user"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">古月残辉</span>

  
    <span class="post-meta-divider">|</span>
    <span class="post-meta-item-icon">
      <i class="fa fa-area-chart"></i>
    </span>
    
      <span class="post-meta-item-text">Site words total count&#58;</span>
    
    <span title="Site words total count">109.2k</span>
  
</div>









<script async src="//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script>

        
<div class="busuanzi-count">
  <script async src="https://dn-lbstatics.qbox.me/busuanzi/2.3/busuanzi.pure.mini.js"></script>

  
    <span class="site-uv">
      <i class="fa fa-user"></i> 访问人数
      <span class="busuanzi-value" id="busuanzi_value_site_uv"></span>
      人
    </span>
  

  
    <span class="site-pv">
      <i class="fa fa-eye"></i> 总访问量
      <span class="busuanzi-value" id="busuanzi_value_site_pv"></span>
      次
    </span>
  
</div>








        
      </div>
    </footer>

    
      <div class="back-to-top">
        <i class="fa fa-arrow-up"></i>
        
      </div>
    

    

  </div>

  

<script type="text/javascript">
  if (Object.prototype.toString.call(window.Promise) !== '[object Function]') {
    window.Promise = null;
  }
</script>









  












  
  
    <script type="text/javascript" src="/hcigmoid/lib/jquery/index.js?v=2.1.3"></script>
  

  
  
    <script type="text/javascript" src="/hcigmoid/lib/fastclick/lib/fastclick.min.js?v=1.0.6"></script>
  

  
  
    <script type="text/javascript" src="/hcigmoid/lib/jquery_lazyload/jquery.lazyload.js?v=1.9.7"></script>
  

  
  
    <script type="text/javascript" src="/hcigmoid/lib/velocity/velocity.min.js?v=1.2.1"></script>
  

  
  
    <script type="text/javascript" src="/hcigmoid/lib/velocity/velocity.ui.min.js?v=1.2.1"></script>
  

  
  
    <script type="text/javascript" src="/hcigmoid/lib/fancybox/source/jquery.fancybox.pack.js?v=2.1.5"></script>
  


  


  <script type="text/javascript" src="/hcigmoid/js/src/utils.js?v=5.1.4"></script>

  <script type="text/javascript" src="/hcigmoid/js/src/motion.js?v=5.1.4"></script>



  
  


  <script type="text/javascript" src="/hcigmoid/js/src/affix.js?v=5.1.4"></script>

  <script type="text/javascript" src="/hcigmoid/js/src/schemes/pisces.js?v=5.1.4"></script>



  
  <script type="text/javascript" src="/hcigmoid/js/src/scrollspy.js?v=5.1.4"></script>
<script type="text/javascript" src="/hcigmoid/js/src/post-details.js?v=5.1.4"></script>



  


  <script type="text/javascript" src="/hcigmoid/js/src/bootstrap.js?v=5.1.4"></script>



  


  




	





  





  










  <script src="//cdn1.lncld.net/static/js/3.0.4/av-min.js"></script>
  <script src="//unpkg.com/valine/dist/Valine.min.js"></script>
  
  <script type="text/javascript">
    var GUEST = ['nick','mail','link'];
    var guest = 'nick,mail,link';
    guest = guest.split(',').filter(item=>{
      return GUEST.indexOf(item)>-1;
    });
    new Valine({
        el: '#comments' ,
        verify: false,
        notify: false,
        appId: '6du4Ppc2TvUuhcccRHSDNH2v-gzGzoHsz',
        appKey: 'zOKNml4W1Bq3OTzEuLt5hUjI',
        placeholder: '感谢阅读！欢迎评论！',
        avatar:'mm',
        guest_info:guest,
        pageSize:'10' || 10,
    });
  </script>



  

  <script type="text/javascript">
    // Popup Window;
    var isfetched = false;
    var isXml = true;
    // Search DB path;
    var search_path = "search.xml";
    if (search_path.length === 0) {
      search_path = "search.xml";
    } else if (/json$/i.test(search_path)) {
      isXml = false;
    }
    var path = "/hcigmoid/" + search_path;
    // monitor main search box;

    var onPopupClose = function (e) {
      $('.popup').hide();
      $('#local-search-input').val('');
      $('.search-result-list').remove();
      $('#no-result').remove();
      $(".local-search-pop-overlay").remove();
      $('body').css('overflow', '');
    }

    function proceedsearch() {
      $("body")
        .append('<div class="search-popup-overlay local-search-pop-overlay"></div>')
        .css('overflow', 'hidden');
      $('.search-popup-overlay').click(onPopupClose);
      $('.popup').toggle();
      var $localSearchInput = $('#local-search-input');
      $localSearchInput.attr("autocapitalize", "none");
      $localSearchInput.attr("autocorrect", "off");
      $localSearchInput.focus();
    }

    // search function;
    var searchFunc = function(path, search_id, content_id) {
      'use strict';

      // start loading animation
      $("body")
        .append('<div class="search-popup-overlay local-search-pop-overlay">' +
          '<div id="search-loading-icon">' +
          '<i class="fa fa-spinner fa-pulse fa-5x fa-fw"></i>' +
          '</div>' +
          '</div>')
        .css('overflow', 'hidden');
      $("#search-loading-icon").css('margin', '20% auto 0 auto').css('text-align', 'center');

      $.ajax({
        url: path,
        dataType: isXml ? "xml" : "json",
        async: true,
        success: function(res) {
          // get the contents from search data
          isfetched = true;
          $('.popup').detach().appendTo('.header-inner');
          var datas = isXml ? $("entry", res).map(function() {
            return {
              title: $("title", this).text(),
              content: $("content",this).text(),
              url: $("url" , this).text()
            };
          }).get() : res;
          var input = document.getElementById(search_id);
          var resultContent = document.getElementById(content_id);
          var inputEventFunction = function() {
            var searchText = input.value.trim().toLowerCase();
            var keywords = searchText.split(/[\s\-]+/);
            if (keywords.length > 1) {
              keywords.push(searchText);
            }
            var resultItems = [];
            if (searchText.length > 0) {
              // perform local searching
              datas.forEach(function(data) {
                var isMatch = false;
                var hitCount = 0;
                var searchTextCount = 0;
                var title = data.title.trim();
                var titleInLowerCase = title.toLowerCase();
                var content = data.content.trim().replace(/<[^>]+>/g,"");
                var contentInLowerCase = content.toLowerCase();
                var articleUrl = decodeURIComponent(data.url);
                var indexOfTitle = [];
                var indexOfContent = [];
                // only match articles with not empty titles
                if(title != '') {
                  keywords.forEach(function(keyword) {
                    function getIndexByWord(word, text, caseSensitive) {
                      var wordLen = word.length;
                      if (wordLen === 0) {
                        return [];
                      }
                      var startPosition = 0, position = [], index = [];
                      if (!caseSensitive) {
                        text = text.toLowerCase();
                        word = word.toLowerCase();
                      }
                      while ((position = text.indexOf(word, startPosition)) > -1) {
                        index.push({position: position, word: word});
                        startPosition = position + wordLen;
                      }
                      return index;
                    }

                    indexOfTitle = indexOfTitle.concat(getIndexByWord(keyword, titleInLowerCase, false));
                    indexOfContent = indexOfContent.concat(getIndexByWord(keyword, contentInLowerCase, false));
                  });
                  if (indexOfTitle.length > 0 || indexOfContent.length > 0) {
                    isMatch = true;
                    hitCount = indexOfTitle.length + indexOfContent.length;
                  }
                }

                // show search results

                if (isMatch) {
                  // sort index by position of keyword

                  [indexOfTitle, indexOfContent].forEach(function (index) {
                    index.sort(function (itemLeft, itemRight) {
                      if (itemRight.position !== itemLeft.position) {
                        return itemRight.position - itemLeft.position;
                      } else {
                        return itemLeft.word.length - itemRight.word.length;
                      }
                    });
                  });

                  // merge hits into slices

                  function mergeIntoSlice(text, start, end, index) {
                    var item = index[index.length - 1];
                    var position = item.position;
                    var word = item.word;
                    var hits = [];
                    var searchTextCountInSlice = 0;
                    while (position + word.length <= end && index.length != 0) {
                      if (word === searchText) {
                        searchTextCountInSlice++;
                      }
                      hits.push({position: position, length: word.length});
                      var wordEnd = position + word.length;

                      // move to next position of hit

                      index.pop();
                      while (index.length != 0) {
                        item = index[index.length - 1];
                        position = item.position;
                        word = item.word;
                        if (wordEnd > position) {
                          index.pop();
                        } else {
                          break;
                        }
                      }
                    }
                    searchTextCount += searchTextCountInSlice;
                    return {
                      hits: hits,
                      start: start,
                      end: end,
                      searchTextCount: searchTextCountInSlice
                    };
                  }

                  var slicesOfTitle = [];
                  if (indexOfTitle.length != 0) {
                    slicesOfTitle.push(mergeIntoSlice(title, 0, title.length, indexOfTitle));
                  }

                  var slicesOfContent = [];
                  while (indexOfContent.length != 0) {
                    var item = indexOfContent[indexOfContent.length - 1];
                    var position = item.position;
                    var word = item.word;
                    // cut out 100 characters
                    var start = position - 20;
                    var end = position + 80;
                    if(start < 0){
                      start = 0;
                    }
                    if (end < position + word.length) {
                      end = position + word.length;
                    }
                    if(end > content.length){
                      end = content.length;
                    }
                    slicesOfContent.push(mergeIntoSlice(content, start, end, indexOfContent));
                  }

                  // sort slices in content by search text's count and hits' count

                  slicesOfContent.sort(function (sliceLeft, sliceRight) {
                    if (sliceLeft.searchTextCount !== sliceRight.searchTextCount) {
                      return sliceRight.searchTextCount - sliceLeft.searchTextCount;
                    } else if (sliceLeft.hits.length !== sliceRight.hits.length) {
                      return sliceRight.hits.length - sliceLeft.hits.length;
                    } else {
                      return sliceLeft.start - sliceRight.start;
                    }
                  });

                  // select top N slices in content

                  var upperBound = parseInt('1');
                  if (upperBound >= 0) {
                    slicesOfContent = slicesOfContent.slice(0, upperBound);
                  }

                  // highlight title and content

                  function highlightKeyword(text, slice) {
                    var result = '';
                    var prevEnd = slice.start;
                    slice.hits.forEach(function (hit) {
                      result += text.substring(prevEnd, hit.position);
                      var end = hit.position + hit.length;
                      result += '<b class="search-keyword">' + text.substring(hit.position, end) + '</b>';
                      prevEnd = end;
                    });
                    result += text.substring(prevEnd, slice.end);
                    return result;
                  }

                  var resultItem = '';

                  if (slicesOfTitle.length != 0) {
                    resultItem += "<li><a href='" + articleUrl + "' class='search-result-title'>" + highlightKeyword(title, slicesOfTitle[0]) + "</a>";
                  } else {
                    resultItem += "<li><a href='" + articleUrl + "' class='search-result-title'>" + title + "</a>";
                  }

                  slicesOfContent.forEach(function (slice) {
                    resultItem += "<a href='" + articleUrl + "'>" +
                      "<p class=\"search-result\">" + highlightKeyword(content, slice) +
                      "...</p>" + "</a>";
                  });

                  resultItem += "</li>";
                  resultItems.push({
                    item: resultItem,
                    searchTextCount: searchTextCount,
                    hitCount: hitCount,
                    id: resultItems.length
                  });
                }
              })
            };
            if (keywords.length === 1 && keywords[0] === "") {
              resultContent.innerHTML = '<div id="no-result"><i class="fa fa-search fa-5x" /></div>'
            } else if (resultItems.length === 0) {
              resultContent.innerHTML = '<div id="no-result"><i class="fa fa-frown-o fa-5x" /></div>'
            } else {
              resultItems.sort(function (resultLeft, resultRight) {
                if (resultLeft.searchTextCount !== resultRight.searchTextCount) {
                  return resultRight.searchTextCount - resultLeft.searchTextCount;
                } else if (resultLeft.hitCount !== resultRight.hitCount) {
                  return resultRight.hitCount - resultLeft.hitCount;
                } else {
                  return resultRight.id - resultLeft.id;
                }
              });
              var searchResultList = '<ul class=\"search-result-list\">';
              resultItems.forEach(function (result) {
                searchResultList += result.item;
              })
              searchResultList += "</ul>";
              resultContent.innerHTML = searchResultList;
            }
          }

          if ('auto' === 'auto') {
            input.addEventListener('input', inputEventFunction);
          } else {
            $('.search-icon').click(inputEventFunction);
            input.addEventListener('keypress', function (event) {
              if (event.keyCode === 13) {
                inputEventFunction();
              }
            });
          }

          // remove loading animation
          $(".local-search-pop-overlay").remove();
          $('body').css('overflow', '');

          proceedsearch();
        }
      });
    }

    // handle and trigger popup window;
    $('.popup-trigger').click(function(e) {
      e.stopPropagation();
      if (isfetched === false) {
        searchFunc(path, 'local-search-input', 'local-search-result');
      } else {
        proceedsearch();
      };
    });

    $('.popup-btn-close').click(onPopupClose);
    $('.popup').click(function(e){
      e.stopPropagation();
    });
    $(document).on('keyup', function (event) {
      var shouldDismissSearchPopup = event.which === 27 &&
        $('.search-popup').is(':visible');
      if (shouldDismissSearchPopup) {
        onPopupClose();
      }
    });
  </script>





  

  

  

  
  

  
  
    <script type="text/x-mathjax-config">
      MathJax.Hub.Config({
        tex2jax: {
          inlineMath: [ ['$','$'], ["\\(","\\)"]  ],
          processEscapes: true,
          skipTags: ['script', 'noscript', 'style', 'textarea', 'pre', 'code']
        }
      });
    </script>

    <script type="text/x-mathjax-config">
      MathJax.Hub.Queue(function() {
        var all = MathJax.Hub.getAllJax(), i;
        for (i=0; i < all.length; i += 1) {
          all[i].SourceElement().parentNode.className += ' has-jax';
        }
      });
    </script>
    <script type="text/javascript" src="//cdn.bootcss.com/mathjax/2.7.1/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
  


  

  

</body>
</html>
